//*************************************************************************************************
//
//	Description:
//		SSAO.fx
//
//	<P> Copyright (c) 2009 Blimey! Games Ltd. All rights reserved.
//
//	Author: 
//		Alastair Murray
//
//	History:
//
//	<TABLE>
//		\Author         Date        Version       Description
//		--------        -----       --------      ------------
//		AMurray		    24/03/2009  0.1           Created
//	<TABLE>
//
//*************************************************************************************************

#include "stddefs.fxh"


texture AOTex : TEXTURE;
sampler AOTexInput : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < AOTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
#ifdef _PS3_
	MinFilter = Point;
	MagFilter = Point;
#else
	MinFilter = Linear;
	MagFilter = Linear;
#endif	
	MipFilter = None;
	SET_NO_ANISOTROPY
};


texture depthTex : TEXTURE;
sampler depthInputTex : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < depthTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
#ifdef _PS3_
	MinFilter = Point;
	MagFilter = Point;
#else
	MinFilter = Linear;
	MagFilter = Linear;
#endif	
	MipFilter = None;
	SET_NO_ANISOTROPY
};


texture linearDepthTex : TEXTURE;
sampler linearDepthInputTex : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < linearDepthTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
#ifdef _PS3_
	MinFilter = Point;
	MagFilter = Point;
#else
	MinFilter = Linear;
	MagFilter = Linear;
#endif	
	MipFilter = None;
	SET_NO_ANISOTROPY
};

texture randomVectorTex : TEXTURE;
sampler randomVectorInputTex : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < randomVectorTex >;
	AddressU  = Wrap;
	AddressV  = Wrap;
	MinFilter = Linear;
	MagFilter = Linear;
	MipFilter = None;
	SET_NO_ANISOTROPY
};


float2 viewportOrigin;
float2 viewportScale;
float2 screenSize;

float4x4 ProjMat;
float4x4 invProjMat;
float4	 NearFarZ;

#define NUM_OCCLUDERS   16

float4      SpherePos[NUM_OCCLUDERS];
float4			Radius_Falloff;	// x,y for 1st test   z,w for 2nd test

struct VSINPUT
{
	float3 position : POSITION;
	float2 texCoord : TEXCOORD0;
};

struct VSOUTPUT
{
	float4 position : POSITION;
	float2 texCoord : TEXCOORD0;
};


VSOUTPUT SSAO_VS( VSINPUT _input )
{
	VSOUTPUT output;

	output.position = float4( _input.position.xyz, 1.0f );
	output.texCoord = _input.texCoord;

	return output;
}


VSOUTPUT SSAO_VS_WithViewport( VSINPUT _input )
{
	VSOUTPUT output;

	// Apply the viewport transformation to the input tex coord
	output.position = float4( _input.position.xyz, 1.0f );
	output.texCoord = ( _input.texCoord * viewportScale ) + viewportOrigin;

	return output;
}

float	CalcDepth( float4 depthTex )
{
#ifdef _PS3_
	// calc depth using z buffer
	const float3 depthFactor = float3(65536.0f / 16777215.0f, 256.0f / 16777215.0f, 1.0f / 16777215.0f);
	float depth = dot(round(float3(depthTex.a, depthTex.r, depthTex.g) * 255.0f), depthFactor);	
#else
	// calc depth
	#ifdef _XBOX
		float	depth = depthTex.x;
	#else
		#ifdef _DEPTH_FROM_ZBUFFER_
			float	depth = (depthTex.x*255.0f/256.0f) + (depthTex.y*255.0f/65536.0f) + (depthTex.z*255.0f/16777216.0f);
		#else
			float	depth = depthTex.x;
		#endif
	#endif
#endif	

	return depth;
}


float4 ScreenToClipSpace( float2 uv : TEXCOORD0 ) : COLOR0
{
    // Get clip space position
    float4 pos;
    pos.x = uv.x * 2.0f - 1.0f;
    pos.y = ( 1.0f - uv.y ) * 2.0f - 1.0f;
    pos.z = CalcDepth( tex2D( depthInputTex, uv ) );
    pos.w = 1.0f;
    return pos;
}

float4 BackProjectDepthValueToCameraSpacePosition( float2 uv : TEXCOORD0 )
{
    // Get clip space position
    float4 clip_pos = ScreenToClipSpace( uv );

    // Back project the clip space position to camera space
    float4 cam_space_pos = mul( clip_pos, invProjMat );
    cam_space_pos /= cam_space_pos.w;

    return float4( cam_space_pos.x, cam_space_pos.y, cam_space_pos.z, 1.0f );
}

COLOUR_OUTPUT_TYPE ConvertToLinearDepthPS( float2 uv : TEXCOORD0 ) : COLOR0
{
	float4 cam_space_pos = BackProjectDepthValueToCameraSpacePosition( uv );
	float4 output = float4( cam_space_pos.z, cam_space_pos.z, cam_space_pos.z, 1.0f );
	return output;
}

COLOUR_OUTPUT_TYPE ApplyAOToScreenPS( float2 uv : TEXCOORD0 ) : COLOR0
{
	float4 output = tex2D( AOTexInput, uv ).r;
	output.a = 1.0f;
	return output;
}


#if 0	// old attempt

#define	cos45	0.70710677f
float4 OccluderPoints[NUM_OCCLUDERS] =
{
	float4( cos45,	-cos45,		-cos45,		1 ),
	float4( -cos45,	-cos45,		-cos45,		1 ),
	float4( -cos45,	-cos45,		cos45,		1 ),
	float4( cos45,	-cos45,		cos45,		1 ),
	float4( cos45,	cos45,		-cos45,		1 ),
	float4( -cos45,	cos45,		-cos45,		1 ),
	float4( -cos45,	cos45,		cos45,		1 ),
	float4( cos45,	cos45,		cos45,		1 )
};


//#define DO_TWO_TESTS	1

#if DO_TWO_TESTS
#define NUM_STEPS		8
#else
#define NUM_STEPS		16
#endif
 
COLOUR_OUTPUT_TYPE CalcSSAO_PS( float2 uv : TEXCOORD0 ) : COLOR0
{  
    // Get clip space position
    float4 clip_pos = ScreenToClipSpace( uv );

    float fOcclusion = 1.0f;

		// approx the normal
		float4 cam_space_pos1 = BackProjectDepthValueToCameraSpacePosition( uv );
		float4 cam_space_pos2 = BackProjectDepthValueToCameraSpacePosition( uv+float2(1.0f/512.0f,0.0f) );
		float4 cam_space_pos3 = BackProjectDepthValueToCameraSpacePosition( uv+float2(0.0f,1.0f/512.0f) );
		cam_space_pos2 = cam_space_pos2-cam_space_pos1;
		cam_space_pos3 = cam_space_pos3-cam_space_pos1;
		float3 normal = cross( cam_space_pos2, cam_space_pos3 );
		normal = normalize(normal);
		
		
    if ( clip_pos.z < 0.9999f )
    {
        // Back project the clip space position to camera space
        float4 cam_space_pos = BackProjectDepthValueToCameraSpacePosition( uv );
				
				float fOccluderRadius = Radius_Falloff.x;
				float fFalloffRadius = Radius_Falloff.y;
				#if DO_TWO_TESTS
				float fOccluderRadius2 = Radius_Falloff.z;
				float fFalloffRadius2 = Radius_Falloff.w;
				#endif

        fOcclusion = 0.0f;

				// outside
				/*if(1 )
				{
					#if DO_TWO_TESTS
					fOccluderRadius = 2.0f;		// 0.05 = good inside (0.025 now)		0.125 = good outside (0.075 with new)
					fFalloffRadius = 0.5f;//0.075f/0.25f;
					fOccluderRadius2 = 0.175f;
					fFalloffRadius2 = 0.75f;//0.075f/0.25f;
					#else
					fOccluderRadius = 0.075f;		// 0.05 = good inside (0.025 now)		0.125 = good outside (0.075 with new)
					fFalloffRadius = 0.075f/0.25f;
				fOccluderRadius = 0.075f;		// 0.05 = good inside (0.025 now)		0.125 = good outside (0.075 with new)
				fFalloffRadius = 0.075f/0.25f;
					#endif
				}
				else
				{
					#if DO_TWO_TESTS
					fOccluderRadius = 0.0125f;		// 0.05 = good inside (0.025 now)		0.125 = good outside (0.075 with new)
					fFalloffRadius = 0.075f/0.025f;
					fOccluderRadius2 = 0.1f;
					fFalloffRadius2 = 0.075f/0.025f;
					#else
					fOccluderRadius = 0.025f;		// 0.05 = good inside (0.025 now)		0.125 = good outside (0.075 with new)
					fFalloffRadius = 0.075f/0.025f;
					#endif
				}*/
								
				float samples = 0;
				#if DO_TWO_TESTS
				float fOcclusion2 = 0.0f;
				float samples2 = 0;
				#endif
        for ( int i = 0; i < NUM_OCCLUDERS; i++ )
        {
					for( int j = 0; j < NUM_STEPS; j++ )
					{
						float r = (float)(j+1) / (float)NUM_STEPS;
						
						// test 1
						{
							float  r1 = fOccluderRadius * r;
							float3 pt = SpherePos[i].xyz * r1;
							
	            // Calculate occluder points on a sphere around the camera space position
	            float4 vOccluderCameraSpacePosition = float4( cam_space_pos.xyz, 1 ) + float4( pt, 0 );

	            // Project the occluder position to clip space
	            float4 vOccluderClipSpacePosition = mul( vOccluderCameraSpacePosition, ProjMat );
	            vOccluderClipSpacePosition /= vOccluderClipSpacePosition.w;

	            // Convert to texture space		
	            float2 vOccluderTextureSpacePosition = vOccluderClipSpacePosition.xy * 0.5f + 0.5f;
	            vOccluderTextureSpacePosition.y = 1 - vOccluderTextureSpacePosition.y;

	            // Sample the linear depth at this texture space position
	            float fSampledCameraSpaceDepth = tex2D( linearDepthInputTex, vOccluderTextureSpacePosition.xy ).r;
	            
	            // Now we have both depths in linear camera space, so we can calculate the depth difference
	            float fDepthDifference = vOccluderCameraSpacePosition.z - fSampledCameraSpaceDepth;	// pd - rd    if >0 we're occluded
							
							if( dot( normal, pt.xyz )>0.0f )
							//if( vOccluderCameraSpacePosition.z<=cam_space_pos.z )	// attempt to cut out the 'bad rays' - use true hemisphere instead!
							{						
								//if( fDepthDifference>=0 )
								if( fDepthDifference>fOccluderRadius )
								{
									fOcclusion+=1.0f; samples+=1.0f;
								}
								else
								{
									//fOcclusion += (fDepthDifference>0.0f )? (float)j/(float)NUM_STEPS: 1.0f;
									
									float val;
									//float dd = fDepthDifference/(0.075f/0.25f);//0.075f;	// outside
									float dd = fDepthDifference/fFalloffRadius;//0.075f;	// inside?
									float weight = 1.0f / (1.0f+(dd*dd));
									
									val = (fDepthDifference>0.0f )? 
									(float)j/(float)NUM_STEPS
									: 
									1.0f/weight;
									
									//val = lerp( val, 1.0f, saturate((fDepthDifference-0.025f)/0.01f) );	// fades out darkening on faces
									
									fOcclusion += val * weight;
									samples += weight;
								}
							}
						}
					
						#if DO_TWO_TESTS	// test 2
						{
							float  r2 = fOccluderRadius2 * r;
							float3 pt2 = SpherePos[i].xyz * r2;
							
	            // Calculate occluder points on a sphere around the camera space position
	            float4 vOccluderCameraSpacePosition = float4( cam_space_pos.xyz, 1 ) + float4( pt2, 0 );

	            // Project the occluder position to clip space
	            float4 vOccluderClipSpacePosition = mul( vOccluderCameraSpacePosition, ProjMat );
	            vOccluderClipSpacePosition /= vOccluderClipSpacePosition.w;

	            // Convert to texture space		
	            float2 vOccluderTextureSpacePosition = vOccluderClipSpacePosition.xy * 0.5f + 0.5f;
	            vOccluderTextureSpacePosition.y = 1 - vOccluderTextureSpacePosition.y;

	            // Sample the linear depth at this texture space position
	            float fSampledCameraSpaceDepth = tex2D( linearDepthInputTex, vOccluderTextureSpacePosition.xy ).r;
	            
	            // Now we have both depths in linear camera space, so we can calculate the depth difference
	            float fDepthDifference = vOccluderCameraSpacePosition.z - fSampledCameraSpaceDepth;	// pd - rd    if >0 we're occluded
							
							//fDepthDifference *= NearFarZ.y/800.0f;
							
							if( dot( normal, pt2.xyz )>0.0f )
							//if( vOccluderCameraSpacePosition.z<=cam_space_pos.z )	// attempt to cut out the 'bad rays' - use true hemisphere instead!
							{
							
								//if( fDepthDifference<=r1*0.75f )
								if( fDepthDifference<=fOccluderRadius2 )
								{
									//fOcclusion += (fDepthDifference>0.0f )? (float)j/(float)NUM_STEPS: 1.0f;
									
									float val;
									//float dd = fDepthDifference/(0.075f/0.25f);//0.075f;	// outside
									float dd = fDepthDifference/fFalloffRadius2;//0.075f;	// inside?
									float weight = 1.0f / (1.0f+(dd*dd));
									
									val = (fDepthDifference>0.0f )? 
									(float)j/(float)NUM_STEPS
									: 
									1.0f/weight;
									
									fOcclusion2 += val * weight;
									
									samples2 += weight;
								}
							
							}
						}
						#endif
						
					}				
        }

        // Calculate the average of the occlusion samples
        fOcclusion /= samples;
        fOcclusion = saturate( fOcclusion );
				
				#if DO_TWO_TESTS
        fOcclusion2 /= samples2;
        fOcclusion2 = saturate( fOcclusion2 );
				fOcclusion = min( fOcclusion, fOcclusion2 );
				//fOcclusion = (fOcclusion+fOcclusion2)*0.5f;
				#endif

				fOcclusion *= fOcclusion;
				fOcclusion *= fOcclusion;
				fOcclusion *= 1.5f;
				fOcclusion = saturate( fOcclusion );
								
				fOcclusion = lerp( fOcclusion, 1.0f, saturate(cam_space_pos.z/100.0f) );	// 100 = max dist that SSAO applies too
    }
					
    return float4( fOcclusion, fOcclusion, fOcclusion, 1 );
}

#endif


COLOUR_OUTPUT_TYPE CalcSSAO2_PS( float2 uv : TEXCOORD0 ) : COLOR0
{  
	// get random vector
	float2	randomTexCoords = uv*screenSize/4.0f;
	//randomTexCoords = float2(0.5,0.5);
	float3	rotVec = 2.0f*tex2D( randomVectorInputTex, randomTexCoords ).rgb-1.0f;
	//rotVec = float3(0.707f,0.0f,0.707f);
	
	// rot matrix
	float3x3	rotMat;
	float			h = 1.0f / (1.0f + rotVec.z);
	rotMat._m00 =  h*rotVec.y*rotVec.y + rotVec.z;
	rotMat._m01 = -h*rotVec.y*rotVec.x;
	rotMat._m02 = -rotVec.x;
	rotMat._m10 = -h*rotVec.y*rotVec.x;
	rotMat._m11 =  h*rotVec.x*rotVec.x + rotVec.z;
	rotMat._m12 = -rotVec.y;
	rotMat._m20 = rotVec.x;
	rotMat._m21 = rotVec.y;
	rotMat._m22 = rotVec.z;

	// current depth
  float currentDepth = tex2D( linearDepthInputTex, uv.xy ).r;
	
	const int NumSamples = 32;
	float offsetScale = 0.01f*0.333f;
	const float offsetScaleStep = 1.0f + 2.4f/NumSamples;
	
	float occlusion = 0.0f;
	
	// samples
	for( int i=0; i<NumSamples/8; i++ )
	for( int x=-1; x<=1; x+=2 )
	for( int y=-1; y<=1; y+=2 )
	for( int z=-1; z<=1; z+=2 )
	{
		// get offset vector
		float3 offVec = normalize(float3(x,y,z));
		offVec *= (offsetScale*=offsetScaleStep);
		
		// rotate vector
		offVec = mul( offVec, rotMat );
		
		// 3d position of sample point
		float3 samplePt = float3( uv.xy, currentDepth );
		samplePt += float3( offVec.xy, offVec.z*currentDepth*2.0f );
		
		// get new depth
		float sampledDepth = tex2D( linearDepthInputTex, samplePt.xy );
		
		// check if depths close enough
		float valid = saturate( (currentDepth-sampledDepth) / sampledDepth );
		
		// add to occlusion
		occlusion += lerp( sampledDepth>samplePt.z, 0.5f, valid );
	}
	
	// average
	occlusion /= NumSamples;
	
	// enhance the result
	float result = saturate(occlusion*occlusion + occlusion);
	return result;
}

float3 mirror( float3 direction, float3 plane ) 
{
  return direction - 2.0f * plane * dot(plane,direction);
}

COLOUR_OUTPUT_TYPE CalcSSAO3_PS( float2 uv : TEXCOORD0 ) : COLOR0
{  
	// get random vector
	float2	randomTexCoords = uv*screenSize/4.0f;
	//randomTexCoords = float2(0.5,0.5);
	float3	rotVec = (2.0f*tex2D( randomVectorInputTex, randomTexCoords ).rgb)-1.0f;
	//rotVec = float3(0.707f,0.0f,0.707f);

	// current depth
  float currentDepth = tex2D( linearDepthInputTex, uv.xy ).r;

	const float3	offsetVectors[8] =
	{
		float3( -0.57735f, -0.57735f, -0.57735f ),	// i.e. normalize( -1, -1, -1 )
		float3( -0.57735f, -0.57735f, 0.57735f ),
		float3( -0.57735f, 0.57735f, -0.57735f ),
		float3( -0.57735f, 0.57735f, 0.57735f ),
		float3( 0.57735f, -0.57735f, -0.57735f ),
		float3( 0.57735f, -0.57735f, 0.57735f ),
		float3( 0.57735f, 0.57735f, -0.57735f ),
		float3( 0.57735f, 0.57735f, 0.57735f ),
	};
	
	const int NumSamples = 32;	// 8,16,24 = quality
	float offsetScale = 0.02f*0.333f;	// radius
	const float offsetScaleStep = 1.0f + 2.4f/NumSamples;	// 2.4 = falloff
	
	float occlusion = 0.0f;
	
	// samples
	for( int i=0; i<NumSamples/8; i++ )
	for( int j=0; j<8; j++ )	// each combo of x,y,z in positive/negative
	{
		// get offset vector
		float3 offVec = offsetVectors[j];
		
		// rotate vector
    offVec = mirror( offVec, rotVec );
		offVec *= (offsetScale*=offsetScaleStep);	// scale it
		
		// 3d position of sample point
		float3 samplePt = float3( uv.xy, currentDepth );
		samplePt += float3( offVec.xy, offVec.z*currentDepth*2.0f );
		
		// get new depth
		float sampledDepth = tex2D( linearDepthInputTex, samplePt.xy );
		
		// check if depths close enough
		float valid = saturate( (currentDepth-sampledDepth) / sampledDepth );
		
		// add to occlusion
		occlusion += lerp( sampledDepth>samplePt.z, 0.5f, valid );
	}
	
	// average
	occlusion /= NumSamples;
	
	// enhance the result
	float result = saturate(occlusion*occlusion + occlusion*1.1f);	// brightness tweak
	return result;
}



technique ConvertToLinearDepth
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		VertexShader = compile sce_vp_rsx SSAO_VS_WithViewport();
		PixelShader = compile sce_fp_rsx ConvertToLinearDepthPS();
#else		
		AlphaBlendEnable = false;
		VertexShader = compile vs_3_0 SSAO_VS_WithViewport();
		PixelShader = compile ps_3_0 ConvertToLinearDepthPS();
#endif
	}
}

technique CalcSSAO_Old
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		VertexShader = compile sce_vp_rsx SSAO_VS_WithViewport();
		PixelShader = compile sce_fp_rsx CalcSSAO2_PS();
#else		
		AlphaBlendEnable = false;
		VertexShader = compile vs_3_0 SSAO_VS_WithViewport();
		PixelShader = compile ps_3_0 CalcSSAO2_PS();
#endif
	}
}

technique CalcSSAO
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		VertexShader = compile sce_vp_rsx SSAO_VS_WithViewport();
		PixelShader = compile sce_fp_rsx CalcSSAO3_PS();
#else		
		AlphaBlendEnable = false;
		VertexShader = compile vs_3_0 SSAO_VS_WithViewport();
		PixelShader = compile ps_3_0 CalcSSAO3_PS();
#endif
	}
}

technique ApplyAOToScreen
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		VertexShader = compile sce_vp_rsx SSAO_VS_WithViewport();
		PixelShader = compile sce_fp_rsx ApplyAOToScreenPS();
#else		
		AlphaBlendEnable = false;
		VertexShader = compile vs_3_0 SSAO_VS_WithViewport();
		PixelShader = compile ps_3_0 ApplyAOToScreenPS();
#endif
	}
}
